The file contains the source code of the data applications of the trading strategies presented in Dynamic Data Science Applications in Optimal Profit Algorithmic Trading.

Load required packages

pkg_list = c('quantmod', 'TTR', 'zoo', 'tseries', 'fGarch','PEIP','gdata',
             'gridExtra','tidyverse', 'aTSA', 'dygraphs', 'urca')
# Function to install required packages if needed
for (pkg in pkg_list)
{
  # Loading the library.
  if (!library(pkg, logical.return=TRUE, character.only=TRUE))
    {
         # If the library cannot be loaded, install first and then load.
        install.packages(pkg)
        library(pkg, character.only=TRUE)
  }
}

Load the required stocks

start.date = '2017-2-1' # starting date of stock
end.date = '2019-3-17' # ending date of stock
# Download the selected stocks from Yahoo finance
getSymbols(c('SPY','ADBE','EBAY','MSFT','IBM','GLD','GDX','EWA','EWC','IGE'), 
                      src = "yahoo", from = start.date, to = end.date)
##  [1] "SPY"  "ADBE" "EBAY" "MSFT" "IBM"  "GLD"  "GDX"  "EWA"  "EWC"  "IGE"
stocks <-merge(SPY = SPY[, "SPY.Adjusted"], ADBE = ADBE[, "ADBE.Adjusted"], 
               EBAY= EBAY[, "EBAY.Adjusted"], MSFT = MSFT[, "MSFT.Adjusted"], 
               IBM = IBM[, "IBM.Adjusted"], GLD = GLD[, "GLD.Adjusted"], 
               GDX = GDX[, "GDX.Adjusted"], EWA = EWA[, "EWA.Adjusted"], 
               EWC = EWC[, "EWC.Adjusted"], IGE = IGE[, "IGE.Adjusted"])
head(stocks)
##            SPY.Adjusted ADBE.Adjusted EBAY.Adjusted MSFT.Adjusted
## 2017-02-01     213.7879        113.36      31.55644      60.07822
## 2017-02-02     213.9288        113.16      31.35050      59.69079
## 2017-02-03     215.4034        115.17      31.44856      60.17270
## 2017-02-06     215.0183        114.46      31.40934      60.13491
## 2017-02-07     215.0277        114.96      31.80159      59.93647
## 2017-02-08     215.3095        116.13      32.60570      59.85144
##            IBM.Adjusted GLD.Adjusted GDX.Adjusted EWA.Adjusted
## 2017-02-01     149.5858       115.20     23.36890     18.50433
## 2017-02-02     149.8347       115.84     23.88821     18.69501
## 2017-02-03     150.8990       116.13     24.00579     18.71234
## 2017-02-06     150.9333       117.70     24.88764     18.53900
## 2017-02-07     153.1648       117.46     24.77986     18.49566
## 2017-02-08     152.3949       118.19     25.05421     18.59100
##            EWC.Adjusted IGE.Adjusted
## 2017-02-01     25.32722     31.71767
## 2017-02-02     25.39264     31.88734
## 2017-02-03     25.48610     32.13736
## 2017-02-06     25.30852     31.85162
## 2017-02-07     25.23376     31.38728
## 2017-02-08     25.32722     31.51230

Function to select cointegrated pairs

find_cointegrated_pairs <- function (data){
  n <- ncol (data)
  pvalue_matrix <- matrix(0, nrow=n, ncol=n)
  pairs <- list()
  m <- 1
  for (i in 1:n){
    for (j in 1:n){
      if(i>=j) { 
        next;
      } else{
      S1 <- data[, i]
      S2 <- data[, j]
      result <- coint.test (as.numeric(S1), as.numeric(S2), output = FALSE)
      pvalue_matrix [i, j] <- result[,3][[1]]
      if (result[,3][[1]] < 0.05){
        pairs [[m]] <- c (i, j)
        m <- m+1}
      }
    }
  }
  newlist <- list(pvalue_matrix, pairs)
  return (newlist)
}

Calculate p-value to identify cointegrated pairs

pvalue<-find_cointegrated_pairs(stocks)
#round(pvalue[[1]],3)

Plot the stock prices and test for multiple cointegration

assets <- c("EWC", "IGE") # selected two assets
pair.stock <- merge(stocks[, 9], stocks[, 10], join="inner")
colnames(pair.stock) <- assets
# Plot the assets 
plot(pair.stock, legend.loc=1)

# Test of multiple cointegration
jotest=ca.jo(pair.stock, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.013169621 0.009127074
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 1 |  4.87  6.50  8.18 11.65
## r = 0  | 11.91 15.66 17.95 23.52
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##            EWC.l2     IGE.l2
## EWC.l2 1.00000000  1.0000000
## IGE.l2 0.01218259 -0.7776555
## 
## Weights W:
## (This is the loading matrix)
## 
##             EWC.l2     IGE.l2
## EWC.d -0.013956528 0.01218314
## IGE.d -0.005642251 0.02946432

Implementation of Non-Gaussian filter algorithm for two stocks

# select pairwise cointegrated stocks
x <- pair.stock[, 1]
y <- pair.stock[, 2]
x$intercept <- rep(1, nrow(x)) # create intercept
var_e <- 0.0001 # innovation covariance of observation
sigma_v <- var_e/(1-var_e)*diag(2) #covariance matrix of state
Ve <- 0.001
P_t <- 10^{-10}*diag(2)
P <- matrix(rep(0, 4), nrow=2)
I_t <- matrix(rep(0, 4), nrow=2) #information matrix
beta <- matrix(rep(0, nrow(y)*2), ncol=2)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
################################################################
# Function to implement the Non-Gaussian maximum filter operations
################################################################
kalman_iteration <- function(y, x) {
for(i in 1:nrow(y)) {
  if(i > 1) {
    beta[i, ] <- beta[i-1, ] # state transition
    P_t <- P + sigma_v # state covariance prediction
    }
  y_fitted[i] <- x[i, ] %*% beta[i, ] # observation prediction
  Q[i] <- x[i, ] %*% P_t %*% t(x[i, ]) + Ve # observation variance prediction
  nu[i] <- y[i] - y_fitted[i] # prediction error
  K_gain <- P_t %*% t(x[i, ]) / Q[i] # information gain
  # updating the state
  beta[i, ] <- beta[i, ] + K_gain * nu[i]
  I_t <- inv(P_t)+ t(x[i, ])%*%x[i, ] / Ve 
  P <- inv(I_t)
  
}
  return(list(beta, P, Q, nu))
}

res <- kalman_iteration(y,x) # Implementation of function
# Extract results
beta <- xts(res[[1]], order.by=index(pair.stock))
plot(beta[2:nrow(beta), 1],type='l',main ='Dynamic hedge ratio',col = "blue")

plot(beta[2:nrow(beta), 2],type='l',main ='Dynamic intercept',col = "blue")

# plot trade signals
nu <- xts(res[[4]], order.by=index(pair.stock))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(pair.stock))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")

Functions to calculate the optimal value of threshold \(p\), which maximizes the Sharpe ratio (SR) and cumulative profit

#############################################################
# Required functions to calculate p and profit
#############################################################
# Function to generate positions and calculate profit and loss for multiple stocks
PnL<-function(signals,nu,beta,x,y){
len <- length(index(signals)) 
vec.sig<-ifelse((signals[1:len]$nu > signals[1:len]$sqrtQ) & 
                  (lag.xts(signals$nu, 1) < lag.xts(signals$sqrtQ, 1)), -1, 
         ifelse((signals[1:len]$nu < signals[1:len]$negsqrtQ) & 
                 (lag.xts(signals$nu, 1) > lag.xts(signals$negsqrtQ, 1)), 1, 0))
colnames(vec.sig) <- "vectorise.signals"
# getting only the first signals
vec.sig[vec.sig == 0] <- NA # replace 0 by NA
vec.sig <- na.locf(vec.sig) # replace the missing values by last real observations
vec.sig <- diff(vec.sig)/2
# generate positions and calculate profit for two stocks
if(ncol(beta)==2){
sim <- merge(lag.xts(vec.sig,1), beta[, 1], x[, 1], y)
colnames(sim) <- c("sig", "hedge", assets[1], assets[2])
sim$posX <- sim$sig * -1000 * sim$hedge
sim$posY <- sim$sig * 1000   
sim$posX[sim$posX == 0] <- NA
sim$posX <- na.locf(sim$posX)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX * diff(sim[, assets[1]])
PLY <- sim$posY * diff(sim[, assets[2]])
profit_loss <- PLX + PLY
  }
# generate positions and calculate profit for three stocks
if(ncol(beta)==3){
sim <- merge(lag.xts(vec.sig,1), beta[, 1], beta[, 2], x[, 1], x[, 2], y)
colnames(sim) <- c("sig", "hedge1", "hedge2", assets[1], assets[2], assets[3])
sim$posX1 <- sim$sig * -1000 * sim$hedge1
sim$posX2 <- sim$sig * -1000 * sim$hedge2
sim$posY <- sim$sig * 1000   
sim$posX1[sim$posX1 == 0] <- NA
sim$posX1 <- na.locf(sim$posX1)
sim$posX2[sim$posX2 == 0] <- NA
sim$posX2 <- na.locf(sim$posX2)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX1 * diff(sim[, assets[1]]) + sim$posX2 * diff(sim[, assets[2]])
PLY <- sim$posY * diff(sim[, assets[3]])
profit_loss <- PLX + PLY
   }
# generate positions and calculate profit for four stocks
if(ncol(beta)==4){
sim <- merge(lag.xts(vec.sig,1), beta[,1], beta[,2], beta[,3], x[,1], x[,2], x[,3], y)
colnames(sim) <- c("sig", "hedge1", "hedge2", "hedge3",assets[1],assets[2], 
                                        assets[3], assets[4])
sim$posX1 <- sim$sig * -1000 * sim$hedge1
sim$posX2 <- sim$sig * -1000 * sim$hedge2
sim$posX3 <- sim$sig * -1000 * sim$hedge3
sim$posY <- sim$sig * 1000   
sim$posX1[sim$posX1 == 0] <- NA
sim$posX1 <- na.locf(sim$posX1)
sim$posX2[sim$posX2 == 0] <- NA
sim$posX2 <- na.locf(sim$posX2)
sim$posX3[sim$posX3 == 0] <- NA
sim$posX3 <- na.locf(sim$posX3)
sim$posY[sim$posY == 0] <- NA
sim$posY <- na.locf(sim$posY)
PLX <- sim$posX1 * diff(sim[, assets[1]]) + sim$posX2 * diff(sim[, assets[2]]) + 
                              sim$posX3 * diff(sim[, assets[3]])
PLY <- sim$posY * diff(sim[, assets[4]])
profit_loss <- PLX + PLY
    }
return(ProfitLoss=profit_loss)
}
# Functions to calculate the optimal value of threshold, $p$
SR.train<-function(nu, sqrtQ, p){
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# Implementation of profit
profit.loss<- PnL(signals,nu,beta,x,y)
st_p <- sqrt(252)*mean(na.omit(profit.loss))/sd(na.omit(profit.loss))
return (st_p)
} 
# determining optimal p to maximize Sharpe ratio 
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
      SR[j] <- SR.train (nu, sqrtQ, p[j])
    }
max(na.omit(SR))
## [1] 1.370774
plot(p, SR, type = "l", col = "blue")

## Calcualte the optimal value of p by maximizing the SR

p.opt<-p[which.max(SR)]
p.opt
## [1] 1.34
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals', 
              col=c('blue', 'red', 'red'), lwd=c(1,2,2))

## Calculate the cumulative profit using optimal trading signals

# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)
sum (na.omit(profit.loss))
## [1] 9340.675
plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")

Implementation of Non-Gaussian filter for multiple trading (three stocks)

assets <- c("EWA", "EWC", "IGE")
three.stocks <- merge(stocks[, 8], stocks[, 9], stocks [, 10])
colnames(three.stocks) <- assets
plot(three.stocks, legend.loc=1)

jotest=ca.jo(three.stocks, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.038080520 0.011171405 0.009036416
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 2 |  4.82  6.50  8.18 11.65
## r <= 1 | 10.79 15.66 17.95 23.52
## r = 0  | 31.40 28.71 31.52 37.22
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##             EWA.l2     EWC.l2    IGE.l2
## EWA.l2  1.00000000  1.0000000  1.000000
## EWC.l2 -0.47601178 -1.2951450 -3.893482
## IGE.l2 -0.01591207 -0.2048727  2.219207
## 
## Weights W:
## (This is the loading matrix)
## 
##            EWA.l2      EWC.l2       IGE.l2
## EWA.d -0.07058604 0.005843659 -0.003161974
## EWC.d -0.01404794 0.014292636 -0.003131786
## IGE.d  0.01849513 0.011053309 -0.009246670

Implementation of Non-Gaussian filter algorithm

x <- three.stocks[, 1:2]
y <- three.stocks[, 3]
x$intercept <- rep(1, nrow(x)) # create intercept
var_e <- 0.0001 # innovation covariance of observation
sigma_v <- var_e/(1-var_e)*diag(3) #covariance matrix of state
Ve <- 0.001
P_t <- 10^{-10}*diag(3)
P <- matrix(rep(0, 4), nrow=3)
## Warning in matrix(rep(0, 4), nrow = 3): data length [4] is not a sub-
## multiple or multiple of the number of rows [3]
I_t <- matrix(rep(0, 4), nrow=3) #information matrix
## Warning in matrix(rep(0, 4), nrow = 3): data length [4] is not a sub-
## multiple or multiple of the number of rows [3]
beta <- matrix(rep(0, nrow(y)*3), ncol=3)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
# Implementation of function of iterative Non-Gaussian filter operations
res <- kalman_iteration(y,x)
# Extract results
beta <- xts(res[[1]], order.by=index(three.stocks))
plot(beta[2:nrow(beta), 1:2], type='l', main = 'Dynamic hedge ratios', 
     col = c("blue", "green"))

plot(beta[2:nrow(beta), 3], type='l', main = 'Dynamic updated intercept', col = "blue")

# plot trade signals
nu <- xts(res[[4]], order.by=index(three.stocks))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(three.stocks))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# determining optimal p to maximize Sharpe ratio 
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
      SR[j] <- SR.train (nu, sqrtQ, p[j])
    }
max(na.omit(SR))
## [1] 1.581271
plot(p, SR, type = "l", col = "blue", ylab = "Annualized SR")

## Calcualte the optimal value of p by maximizing the SR

p.opt<-p[which.max(SR)]
p.opt
## [1] 1.02
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals', 
                col=c('blue', 'red', 'red'), lwd=c(1,2,2))

## Calculate the cumulative profit using optimal trading signals

# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)
sum (na.omit(profit.loss))
## [1] 9713.534
plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")

Implementation of Non-Gaussian filter for multiple trading (four stocks)

Plot and test for the presence of cointegration between stocks

assets <- c("GDX", "EWA", "EWC", "IGE")
four.stocks <- merge(stocks[, 7], stocks[, 8], stocks[, 9], stocks [, 10])
colnames(four.stocks) <- assets
plot(four.stocks, legend.loc=1)

jotest=ca.jo(four.stocks, type="trace", K=2, ecdet="none", spec="longrun")
summary(jotest)
## 
## ###################### 
## # Johansen-Procedure # 
## ###################### 
## 
## Test type: trace statistic , with linear trend 
## 
## Eigenvalues (lambda):
## [1] 0.045577162 0.021370707 0.011810312 0.009079026
## 
## Values of teststatistic and critical values of test:
## 
##           test 10pct  5pct  1pct
## r <= 3 |  4.84  6.50  8.18 11.65
## r <= 2 | 11.15 15.66 17.95 23.52
## r <= 1 | 22.62 28.71 31.52 37.22
## r = 0  | 47.39 45.23 48.28 55.43
## 
## Eigenvectors, normalised to first column:
## (These are the cointegration relations)
## 
##              GDX.l2     EWA.l2    EWC.l2     IGE.l2
## GDX.l2   1.00000000  1.0000000  1.000000   1.000000
## EWA.l2 -19.53896573 -0.4622338  3.276195   7.277618
## EWC.l2   9.13739689  0.5825642 -2.241764 -20.127084
## IGE.l2   0.02478951  0.2101317 -1.271165   8.400918
## 
## Weights W:
## (This is the loading matrix)
## 
##              GDX.l2        EWA.l2      EWC.l2        IGE.l2
## GDX.d  0.0026561203 -0.0252144712 0.001788170 -0.0005459360
## EWA.d  0.0040943496  0.0023533474 0.002981879 -0.0004305856
## EWC.d  0.0012926404 -0.0006573674 0.006025750 -0.0001939488
## IGE.d -0.0002119599  0.0011256165 0.007472117 -0.0016118821

Implementation of Non-Gaussian filter algorithm

x <- four.stocks[, 1:3]
y <- four.stocks[, 4]
x$intercept <- rep(1, nrow(x)) # create intercept
var_e <- 0.0001 # innovation covariance of observation
sigma_v <- var_e/(1-var_e)*diag(4) #covariance matrix of state
Ve <- 0.001
P_t <- 10^{-10}*diag(4)
P <- matrix(rep(0, 4), nrow=4)
I_t <- matrix(rep(0, 4), nrow=4) #information matrix
beta <- matrix(rep(0, nrow(y)*4), ncol=4)
y_fitted <- rep(0, nrow(y))
nu <- rep(0, nrow(y))
Q <- rep(0, nrow(y))
# Implementation of function of iterative Non-Gaussian filter operations
res <- kalman_iteration(y,x)
# Extract results
beta <- xts(res[[1]], order.by=index(four.stocks))
plot(beta[2:nrow(beta), 1:3], type='l', main = 'Dynamic updated hedge ratios', 
     col = c("blue", "green", "red"))

plot(beta[2:nrow(beta), 4], type='l', main = 'Dynamic updated intercept', col = "blue")

# plot trading signals
nu <- xts(res[[4]], order.by=index(four.stocks))
sqrtQ <- xts(sqrt(res[[3]]), order.by=index(four.stocks))
signals <- merge(nu, sqrtQ, -sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
# determining optimal p to maximize Sharpe ratio 
p <- seq(0.1, 2, 0.01)
SR<-0
for(j in 1:length(p)){
      SR[j] <- SR.train (nu, sqrtQ, p[j])
    }
max(na.omit(SR))
## [1] 1.776355
plot(p, SR, type = "l", col = "blue", ylab = "Annualized SR")

## Calcualte the optimal value of p by maximizing the SR

p.opt<-p[which.max(SR)]
p.opt
## [1] 0.84
# create optimal trading signals
p <- p.opt
signals <- merge(nu, p*sqrtQ, -p*sqrtQ)
colnames(signals) <- c("nu", "sqrtQ", "negsqrtQ")
plot(signals[3:length(index(signals))], ylab='nu', main = 'Trading signals', 
                col=c('blue', 'red', 'red'), lwd=c(1,2,2))

## Calculate the cumulative profit using optimal trading signals

# Implementation of profit and loss fucntion to calculate cumulative profit
profit.loss<- PnL(signals,nu,beta,x,y)

plot(cumsum(na.omit(profit.loss)), main="Cumulative profit, $", col = "blue")

sum (na.omit(profit.loss))
## [1] 13334.45

Buy and hold strategy

maxx<- nrow (stocks)
GDX <- as.numeric(stocks [maxx, 7]) - as.numeric(stocks [1, 7])
EWA <- as.numeric(stocks [maxx, 8]) - as.numeric(stocks [1, 8])
EWC <- as.numeric(stocks [maxx, 9]) - as.numeric(stocks [1, 9])
IGE <- as.numeric(stocks [maxx, 10]) - as.numeric(stocks [1, 10])
1000*(EWC + IGE)
## [1] -778.936
1000*(EWA + EWC + IGE)
## [1] 1319.735
1000*(GDX + EWA + EWC + IGE)
## [1] 93.836